import pandas as pd
import numpy as np
import pickle

atp_tennis = pd.read_csv('atp_tennis.csv')

# Remove any extra spaces from the player names
top_10_player_names = [player[0].strip() for player in top_10_ranked_players_list]
# top_10_player_names = [player.strip() for player in top_10_ranked_players_list]

# Filter the dataset to only include grass court matches played by the top 10 players in the last five years
grass_matches = recent_matches[(recent_matches['Surface'] == 'Grass') & (recent_matches['Player_1'].isin(top_10_player_names) | recent_matches['Player_2'].isin(top_10_player_names))]

# Calculate the number of matches played by each player
matches_played = grass_matches['Player_1'].value_counts() + grass_matches['Player_2'].value_counts()
matches_played = matches_played[top_10_player_names].fillna(0)

# Calculate the number of matches won by each player
matches_won = grass_matches['Winner'].value_counts()
matches_won = matches_won[top_10_player_names].fillna(0)

# Calculate the win rate percentage for each player
win_rate_percentage = (matches_won / matches_played) * 100

# Create a data table with player names, number of matches played, number of matches won, and win rate percentage
win_rate_table = pd.DataFrame({'Player': top_10_player_names, 'Matches Played': matches_played, 'Matches Won': matches_won, 'Win Rate Percentage': win_rate_percentage})
result = win_rate_table.reset_index(drop=True)

print(win_rate_table)
pickle.dump(win_rate_table,open("./ref_result/win_rate_table.pkl","wb"))